1 Introduction

Here, we will annotate the cells of the patient with id “365”.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_obj <- here::here("results/R_objects/5.seurat_clustered_365.rds")
path_to_obj_sampl_artifacts <- here::here("results/R_objects/cll_seurat_annotated.rds")
path_to_save <- here::here("results/R_objects/6.seurat_annotated_365.rds")
path_to_save_markers <- here::here("3-clustering_and_annotation/markers_clusters_365.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))


# Thresholds
min_log2FC <- 0.3
alpha <- 0.001

2.3 Load data

seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 23326 features across 4685 samples within 1 assay 
## Active assay: RNA (23326 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
DimPlot(seurat, cols = color_palette)

seurat_bias <- readRDS(path_to_obj_sampl_artifacts)

3 Cell Cycle Score

As upregulation of cell cycle genes is a hallmark of Richter transformation, we will infer the cell cycle score and phase for each cell:

DimPlot(seurat, group.by = "time_point")

FeaturePlot(seurat, "CD3D")

seurat <- CellCycleScoring(
  seurat,
  s.features = cc.genes.updated.2019$s.genes,
  g2m.features = cc.genes.updated.2019$g2m.genes,
  set.ident = FALSE
)
DimPlot(seurat, group.by = "Phase")

umap_s_score <- FeaturePlot(seurat, features = "S.Score") +
  scale_color_viridis_c(option = "magma") +
  labs(title = "S Score") +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12, face = "plain"),
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.line = element_blank()
  )
umap_g2m_score <- FeaturePlot(seurat, features = "G2M.Score") +
  scale_color_viridis_c(option = "magma") +
  labs(title = "G2M Score") +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12, face = "plain"),
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.line = element_blank()
  )
umap_cc_combined <- ggpubr::ggarrange(
  plotlist = list(umap_s_score, umap_g2m_score),
  nrow = 2,
  ncol = 1,
  common.legend = FALSE
)
umap_cc_combined

4 Find Markers

markers <- FindAllMarkers(seurat, only.pos = TRUE, logfc.threshold = min_log2FC)
markers <- markers %>%
  mutate(cluster = as.character(cluster)) %>%
  filter(p_val_adj < alpha) %>%
  arrange(cluster) %>%
  group_by(cluster) %>%
  arrange(desc(avg_log2FC), .by_group = TRUE)
DT::datatable(markers)

5 Annotation

Important literature to annotate the cells:

Cluster Markers Annotation
0 CXCR4 CLL-like
1_0 CD27 CXCR4loCD27hi
1_1 MIR155HG MIR155HGhi
1_2 ENO1, TCL1A RT-like quiescent
1_3 PCNA, TOP2A RT-like proliferative

Annotate:

seurat$annotation_final <- factor(
  seurat$final_clusters,
  levels = c("0", "1_0", "1_1", "1_2", "1_3")
)
new_levels_365 <- c("CLL-like", "CXCR4loCD27hi", "MIR155HGhi",
                    "RT-like quiescent", "RT-like proliferative")
levels(seurat$annotation_final) <- new_levels_365
reordered_levels_365 <- new_levels_365
seurat$annotation_final <- factor(seurat$annotation_final, reordered_levels_365)
Idents(seurat) <- "annotation_final"


# Plot UMAP
cols <- c("#cfc9c9", "#808080", "#333333", "#d65c7a", "#813151")
names(cols) <- levels(seurat$annotation_final)
umap_annotation <- DimPlot(seurat, pt.size = 0.5)
col_labels <- c(
  "CLL-like" = bquote("CLL-like"),
  "CXCR4loCD27hi" = bquote(CXCR4^lo~CD27^hi),
  "MIR155HGhi" = bquote(MIR155HG^hi),
  "RT-like quiescent" = bquote("RT-like quiescent"),
  "RT-like proliferative" = bquote("RT-like proliferative")
)
umap_annotation <- umap_annotation +
  scale_color_manual(values = cols, breaks = names(cols), labels = col_labels) +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.line = element_blank()
  )
umap_annotation

6 Visualize markers

# UMAPs
genes_interest <- c("CXCR4", "CD27", "MIR155HG", "ENO1", "TCL1A",
                    "PCNA", "TOP2A")
feature_plots <- purrr::map(genes_interest, function(x) {
  p <- FeaturePlot(seurat, x, pt.size = 0.5) +
    scale_color_viridis_c(option = "magma")
  p
})
 


# Dot plots
dot_plot <- DotPlot(seurat, features = rev(genes_interest)) +
  coord_flip() +
  scale_color_viridis_c(option = "magma") +
  scale_y_discrete(breaks = names(col_labels), labels = col_labels) +
  theme(
    axis.title = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
    legend.title = element_text(size = 12)
  )
dot_plot

# Violin plots
vln_plot_s <- seurat@meta.data %>%
  ggplot(aes(annotation_final, S.Score)) +
    geom_violin(fill = "gray") +
    labs(x = "", y = "S Phase Score") +
    scale_x_discrete(breaks = names(col_labels), labels = col_labels) +
    theme_bw() +
    theme(axis.text.x = element_text(color = "black", angle = 45, vjust = 1, hjust = 1, size = 11))
vln_plot_s

vln_plot_g2m <- seurat@meta.data %>%
  ggplot(aes(annotation_final, G2M.Score)) +
    geom_violin(fill = "gray") +
    labs(x = "", y = "G2M Phase Score") +
    scale_x_discrete(breaks = names(col_labels), labels = col_labels) +
    theme_bw() +
    theme(axis.text.x = element_text(color = "black", angle = 45, vjust = 1, hjust = 1, size = 11))
vln_plot_g2m

7 Validation 1: CLL-specific clustering

Although we could recover the major clusters that we have observed in previous patients, it is intriguing that the clusters of MIR155HGhi and CXCR4loCD27hi were found in the RT time-point and not in the CLL. Thus, let us perform a CLL-specific clustering:

seurat_cll <- subset(seurat, time_point == "T2")
seurat_cll <- process_seurat(seurat_cll, dims = 1:20)
feature_plots2 <- purrr::map(c("CXCR4", "CD27", "MIR155HG", "S100A4"), function(x) {
  p <- FeaturePlot(seurat_cll, x, pt.size = 0.5) +
    scale_color_viridis_c(option = "magma")
  p
})
feature_plots2
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Indeed, we see that CXCR4, CD27 and MIR155HG are mutually exclusive, but we cannot distinguish 3 clear clusters

seurat_cll <- FindNeighbors(seurat_cll, dims = 1:20, reduction = "pca")
seurat_cll <- FindClusters(seurat_cll, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4119
## Number of edges: 134238
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6852
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat_cll)

8 Validation 2: sampling artifacts

Could this pattern be explained by a differential processing time between both samples (RT and CLL)?

seurat_bias <- subset(seurat_bias, temperature == "RT")
seurat_bias <- subset(seurat_bias, time == "24h")
DimPlot(seurat_bias)

seurat_bias <- SplitObject(seurat_bias, "cell_type")
seurat_bias <- seurat_bias[c("CLL 1472", "CLL 1892")]
seurat_bias <- purrr::map(seurat_bias, process_seurat, dims = 1:20)
DimPlot(seurat_bias$`CLL 1472`, group.by = "time")

FeaturePlot(seurat_bias$`CLL 1472`, c("CXCR4", "CD27", "MIR155HG"))

DimPlot(seurat_bias$`CLL 1892`, group.by = "time")

FeaturePlot(seurat_bias$`CLL 1892`, c("CXCR4", "CD27", "MIR155HG"))

9 Save

# Save Seurat object
saveRDS(seurat, path_to_save)


# Save markers
markers$annotation <- factor(markers$cluster)
levels(markers$annotation) <- new_levels_365
markers_list <- purrr::map(levels(markers$annotation), function(x) {
  df <- markers[markers$annotation == x, ]
  df <- df[, c(7, 1, 5, 2:4, 6, 8)]
  df
})
names(markers_list) <- levels(markers$annotation)
markers_list <- markers_list[reordered_levels_365]
markers_final <- bind_rows(markers_list)
saveRDS(markers_list, path_to_save_markers)
saveRDS(
  markers_final,
  here::here("results/tables/markers/markers_annotated_clusters_patient_365.rds")
)
saveRDS(markers_list, path_to_save_markers)
openxlsx::write.xlsx(
  x = markers_list,
  file = "results/tables/markers/markers_annotated_clusters_patient_365.xlsx"
)

10 Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.6          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.3        tidyverse_1.3.1      harmony_1.0          Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.18.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         crosstalk_1.1.1       listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           limma_3.46.0          openxlsx_4.2.3        remotes_2.4.0         globals_0.14.0        modelr_0.1.8          matrixStats_0.59.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.4.1           xfun_0.23             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.8          car_3.0-10            future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             rstatix_0.7.0         miniUI_0.1.1.1        viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   foreign_0.8-81        rsvd_1.0.5            DT_0.18               htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2   
##  [55] ellipsis_0.3.2        ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.10           dbplyr_2.1.1          deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.7           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         knitr_1.33            fs_1.5.0              fitdistrplus_1.1-5    zip_2.2.0             RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            compiler_4.0.4        rstudioapi_0.13       curl_4.3.1            plotly_4.9.4          png_0.1-7             ggsignif_0.6.2        spatstat.utils_2.2-0  reprex_2.0.0          bslib_0.2.5.1         stringi_1.6.2         highr_0.9             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-4          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.15  
## [109] spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0              bookdown_0.22         promises_1.2.0.1      rio_0.5.26            KernSmooth_2.23-18    gridExtra_2.3         parallelly_1.26.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-36           parallel_4.0.4        hms_1.1.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.8         carData_3.0-4         Rtsne_0.15            ggpubr_0.4.0          shiny_1.6.0           lubridate_1.7.10